********************************************************************************
** 	TITLE:		ca2004_polls		     	                                  ** 	
**	AUTHOR:	    Philippe Mongrain                                             **
**	DATE:		October 2022 					                              **	
**  VERSION:	Stata 16					                                  **	
********************************************************************************

* Version control

version 16.0

* Import data

use "Canada_Federal_Election_Polls_May_1945_July_2019.dta", clear

drop if note == 99

keep if electionyear == 2004

* Generate survey date

generate pdate = date(polldate, "YMD")
format %td pdate
drop polldate
rename pdate polldate

* Generate election date

generate edate = date(electiondate, "YMD")
format %td edate
drop electiondate
rename edate electiondate

* Time of survey

gen time = electiondate - polldate

drop if time == 0

* Generate mean vote intention value by day

bysort time : egen lpcvote = mean(lpc_int)
bysort time : egen cpcvote = mean(cpc_int)
bysort time : egen pcvote = mean(pc_int)
bysort time : egen pccpcvote = mean(pc_cpc_int)
bysort time : egen reformvote = mean(reform_int)
bysort time : egen ndpvote = mean(ndp_int)
bysort time : egen bqvote = mean(bq_int)
bysort time : egen greenvote = mean(green_int)
bysort time : egen ppcvote = mean(ppc_int)

sort time

* Drop duplicates

duplicates tag polldate, gen(dup)
duplicates drop polldate, force
drop dup

* Reshape the dataset

rename lpc_int v_lpc
rename cpc_int v_cpc
rename pc_int v_pc
rename pc_cpc_int v_pccpc
rename reform_int v_reform
rename ndp_int v_ndp
rename bq_int v_bq
rename green_int v_gpc
rename ppc_int v_ppc

reshape long v_, i(polldate) j(party) string

rename v_ vote

keep polldate electiondate party vote firm time

order firm polldate electiondate time party vote

* Generate rank of parties

drop if party == "pc" | party == "cpc"

gsort electiondate polldate -vote

bysort electiondate polldate : gen rank = _n

gen first = party if rank == 1
gen second = party if rank == 2
gen third = party if rank == 3

bysort polldate : gen winner = first[1]
bysort polldate : gen runnerup = second[2]
bysort polldate : gen thirdplace = third[3]

* Generate poll margin

bysort electiondate polldate : gen pollmar = vote[1] - vote[2]

* Drop duplicates

duplicates tag electiondate polldate, gen(dup)
duplicates drop electiondate polldate, force
drop dup

* Misleading poll

gen misleading = .

replace misleading = 0 if winner == "lpc" & pollmar >= 1 & electiondate == td(28jun2004)
replace misleading = 1 if misleading!=0 & electiondate == td(28jun2004)

* Save

drop if time == 0 | time == .

keep polldate pollmar misleading time

save "ca2004_polls.dta", replace